Multiples factors play an important function during the naive CD4-T cell differentiation where Blimp1 (encoded by Prdm1) and Bcl6 are the masters. Taking advantatge of the Cicero’s prediction, we are going to characterize the accessibility pattern not only at th level of specific gene, but also at the level of the co-accessibility regions to which gene belongs.
library(Seurat)
library(Signac)
library(GenomicRanges)
library(pheatmap)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(plyr)
library(stringr)
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D",
"#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0",
"#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA",
"#FBE426", "#16FF32", "black", "#3283FE",
"#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB",
"#822E1C", "coral2", "#1CFFCE", "#1CBE4F",
"#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
"#FEAF16", "#683B79", "#B10DA1", "#1C7F93",
"#F8A19F", "dark orange", "#FEAF16",
"#FBE426", "Brown")
cell_type <- "CD4_T"
path_to_obj <- paste0(
here::here("scATAC-seq/results/R_objects/level_5/"),
cell_type,
"/04.",
cell_type,
"_integration_peak_calling_level_5.rds",
sep = ""
)
path_to_cicero <- paste0(
here::here("scATAC-seq/results/Cicero/"),
cell_type,"/",
sep = ""
)
path_to_obj_general <- here::here("scATAC-seq/results/R_objects/8.4.tonsil_peakcalling_annotation_level1_chromVar.rds")
upstream <- 2000
plot_dim <- function(seurat, group){
DimPlot(seurat,
group.by = group,
cols = color_palette,
pt.size = 0.1,raster=FALSE)
}
mat_heatmap <- function(seurat, features, group,
cutree_ncols,cutree_nrows){
avgexpr_mat <- AverageExpression(
features = features,
seurat,
assays = "peaks_level_5",
return.seurat = F,
group.by = group,
slot = "data")
p1 <- pheatmap(avgexpr_mat$peaks_level_5, scale = "row",
angle_col = 45,
show_rownames=T,
border_color = "white",
cluster_rows = T,
cluster_cols = T,
fontsize_col = 10,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
cutree_rows = cutree_nrows,
cutree_cols = cutree_ncols)
return(p1)}
gene_reference <- here::here("scATAC-seq/Cicero/genes.gtf")
gtf <- rtracklayer::import(gene_reference)
gtf_df <- as.data.frame(gtf)
gtf_df_gene <- gtf_df[gtf_df$type == "gene",]
DT::datatable(gtf_df_gene)
# We are going to add 2000 bp upstream of each gene to take in acccount the proximal promoter regions
gtf_df_gene$start <- gtf_df_gene$start - upstream
tonsil <- readRDS(path_to_obj_general)
tonsil
## An object of class Seurat
## 271530 features across 101075 samples within 2 assays
## Active assay: peaks_macs (270784 features, 270784 variable features)
## 1 other assay present: chromvar
## 3 dimensional reductions calculated: umap, lsi, harmony
tonsil_peaks <- tonsil@assays$peaks_macs@ranges
plot_dim(tonsil, group = "annotation_level_1")
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 93116 features across 16383 samples within 1 assay
## Active assay: peaks_level_5 (93116 features, 92407 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
seurat_peaks <- seurat@assays$peaks_level_5@ranges
plot_dim(seurat, group = "annotation_paper")
At low level of resolution, we want to detect the main epigenomic changes between Non-Tfh vs Tfh. For this reason, we decide to group the cells in 4 clusters: Non-Tfh, Tfh, Central Memory and Naive.
seurat@meta.data <- seurat@meta.data %>% mutate(Group =
case_when(annotation_paper == "Naive" ~ "Naive",
annotation_paper == "CM Pre-non-Tfh" ~ "Central Memory",
annotation_paper == "CM PreTfh" ~ "Central Memory",
annotation_paper == "T-Trans-Mem" ~ "Non-Tfh",
annotation_paper == "T-Eff-Mem" ~ "Non-Tfh",
annotation_paper == "T-helper" ~ "Non-Tfh",
annotation_paper == "Tfh T:B border" ~ "Tfh",
annotation_paper == "Tfh-LZ-GC" ~ "Tfh",
annotation_paper == "GC-Tfh-SAP" ~ "Tfh",
annotation_paper == "GC-Tfh-0X40" ~ "Tfh",
annotation_paper == "Tfh-Mem" ~ "Tfh",
annotation_paper == "Memory T cells" ~ "Non-Tfh",
annotation_paper == "Eff-Tregs" ~ "Non-Tfh",
annotation_paper == "non-GC-Tf-regs" ~ "Non-Tfh",
annotation_paper == "GC-Tf-regs" ~ "Non-Tfh"))
plot_dim(seurat, group = "Group")
bcl6 <- gtf_df_gene[which(gtf_df_gene$gene_name %in% "BCL6"),]
bcl6_gr <- makeGRangesFromDataFrame(bcl6)
prdm1 <- gtf_df_gene[which(gtf_df_gene$gene_name %in% "PRDM1"),]
prdm1_gr <- makeGRangesFromDataFrame(prdm1)
conns <- read.table(paste0(path_to_cicero,
"level_5_conns.tsv"), header = T)
ccans <- read.table(paste0(path_to_cicero,
"level_5_ccans.tsv"), header = T)
links_T <- ConnectionsToLinks(conns = conns,
ccans = ccans, threshold = 0)
Links(seurat) <- links_T
# Overlapping bcl6 with seurat_peaks
overlapping_bcl6_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks,
bcl6_gr)),]
bcl6_seurat_peaks_features <- paste0(
seqnames(overlapping_bcl6_peaks),"-",
start(overlapping_bcl6_peaks),"-",
end(overlapping_bcl6_peaks))
# Overlapping bcl6 with links
overlapping <- links_T[queryHits(findOverlaps(links_T, bcl6_gr)),]
overlapping_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks,
overlapping)),]
features <- paste0(seqnames(overlapping_peaks),"-",
start(overlapping_peaks),"-",
end(overlapping_peaks))
region_bcl6_links <- paste0(unique(seqnames(overlapping_peaks)),"-",
min(start(overlapping_peaks)),"-",
max(end(overlapping_peaks)))
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/bcl6_coverage_plot.pdf"),
# width = 10,
# height = 6)
region.highlight_enhancer <- GRanges(seqnames = "chr3",
ranges = IRanges(start = 187910000,
end = 188001000))
CoveragePlot(object = seurat,
region.highlight = region.highlight_enhancer,
group = "annotation_paper",
region = region_bcl6_links)
mat_heatmap(seurat = seurat,
features = features,
group = "Group",
cutree_ncols = 2,
cutree_nrows = 1)
mat_heatmap(seurat = seurat,
features = features,
group = "annotation_paper",
cutree_ncols = 2,cutree_nrows = 1)
enhancer_region <- overlapping_peaks[start(overlapping_peaks) >= 187900000,]
enhancer_features <- unique(paste0("chr3", "-",
start(ranges(enhancer_region)), "-",
end(ranges(enhancer_region))))
region_plot <- paste0("chr3", "-",
min(start(ranges(enhancer_region))),"-",
max(end(ranges(enhancer_region))))
CoveragePlot(object = seurat,
region.highlight = region.highlight_enhancer,
group = "annotation_paper",
region = region_plot)
Finally, we can compute a combinatory score taking into account the peaks located at BCL6 gene level and the peaks located on the super enhancer region.
seurat = AddModuleScore(
object = seurat,
features = list(bcl6_seurat_peaks_features),
name = 'BCL6_gene')
seurat = AddModuleScore(
object = seurat,
features = list(enhancer_features),
name = 'BCL6_enhancer')
bcl6_umap <- FeaturePlot(
seurat, min.cutoff = "q5",
max.cutoff = "q95",
raster = F,
features = "BCL6_gene1",
pt.size = 0.3)
bcl6_enhancer_umap <- FeaturePlot(seurat,
min.cutoff = "q5",
max.cutoff = "q95",
raster = F,
features = "BCL6_enhancer1",
pt.size = 0.3)
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/bcl6_umap_enhancer.pdf"),
# width = 10,
# height = 6)
join = bcl6_umap | bcl6_enhancer_umap
print(join)
#dev.off()
overlapping_bcl6_gw <- tonsil_peaks[queryHits(findOverlaps(tonsil_peaks, bcl6_gr)),]
bcl6_peaks_gw <- paste0("chr3", "-",
start(ranges(overlapping_bcl6_gw)), "-",
end(ranges(overlapping_bcl6_gw)))
enhancer_region_gw <- tonsil_peaks[seqnames(tonsil_peaks) == "chr3" &
start(tonsil_peaks) >= 187910000 &
end(tonsil_peaks) <= 188001000,]
enhancer_features_gw <- unique(paste0("chr3", "-",
start(ranges(enhancer_region_gw)), "-",
end(ranges(enhancer_region_gw))))
Finally, we can compute a combinatory score taking into account the peaks located at BCL6 gene level and the peaks located on the super enhancer region.
tonsil = AddModuleScore(
object = tonsil,
features = list(bcl6_peaks_gw),
name = 'BCL6_gene'
)
tonsil = AddModuleScore(
object = tonsil,
features = list(enhancer_features_gw),
name = 'BCL6_enhancer'
)
bcl6_umap <- FeaturePlot(
tonsil,
min.cutoff = "q5",
max.cutoff = "q95",
raster = F,
features = "BCL6_gene1",
pt.size = 0.3)
bcl6_enhancer_umap <- FeaturePlot(
tonsil,
min.cutoff = "q5",
max.cutoff = "q95",
raster = F,
features = "BCL6_enhancer1",
pt.size = 0.3)
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/entire_tonsil_umap_enhancer.pdf"),
# width = 10,
# height = 6)
join = bcl6_umap | bcl6_enhancer_umap
print(join)
overlapping <- links_T[queryHits(findOverlaps(links_T, prdm1_gr)),]
overlapping_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks,
overlapping)),]
features <- paste0(seqnames(overlapping_peaks),"-",
start(overlapping_peaks),"-",
end(overlapping_peaks))
region_plot <- paste0(unique(seqnames(overlapping_peaks)),"-",
min(start(overlapping_peaks)),"-",
max(end(overlapping_peaks)))
#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/prdm1_coverage_plot.pdf"),
# width = 10,
# height = 6)
CoveragePlot(object = seurat, group = "annotation_paper", region = region_plot)
mat_heatmap(seurat = seurat,
features = features,
group = "Group",
cutree_ncols = 2,
cutree_nrows = 1)
mat_heatmap(seurat = seurat,
features = features,
group = "annotation_paper",
cutree_ncols = 2,cutree_nrows = 1)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plyr_1.8.6 ggpubr_0.4.0 data.table_1.14.0 forcats_0.5.0 stringr_1.4.0 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 tidyverse_1.3.0 ggplot2_3.3.5 dplyr_1.0.7 pheatmap_1.0.12 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2 IRanges_2.22.1 S4Vectors_0.26.0 BiocGenerics_0.34.0 Signac_1.2.1 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.1 reticulate_1.20 tidyselect_1.1.1 htmlwidgets_1.5.3 grid_4.0.3 docopt_0.7.1 BiocParallel_1.22.0 Rtsne_0.15 munsell_0.5.0 codetools_0.2-17 ica_1.0-2 DT_0.16 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-2 Biobase_2.48.0 knitr_1.30 rstudioapi_0.11 ROCR_1.0-11 ggsignif_0.6.0 tensor_1.5 listenv_0.8.0 labeling_0.4.2 slam_0.1-47 GenomeInfoDbData_1.2.3 polyclip_1.10-0 farver_2.1.0 rprojroot_2.0.2 parallelly_1.26.1 vctrs_0.3.8 generics_0.1.0 xfun_0.18 lsa_0.73.2 ggseqlogo_0.1 R6_2.5.0 DelayedArray_0.14.0 bitops_1.0-7 spatstat.utils_2.2-0 assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1
## [43] gtable_0.3.0 globals_0.14.0 goftest_1.2-2 rlang_0.4.11 RcppRoll_0.3.0 splines_4.0.3 rtracklayer_1.48.0 rstatix_0.6.0 lazyeval_0.2.2 spatstat.geom_2.2-0 broom_0.7.2 BiocManager_1.30.10 yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 modelr_0.1.8 crosstalk_1.1.1 backports_1.1.10 httpuv_1.6.1 tools_4.0.3 bookdown_0.21 ellipsis_0.3.2 spatstat.core_2.2-0 RColorBrewer_1.1-2 ggridges_0.5.3 Rcpp_1.0.6 zlibbioc_1.34.0 RCurl_1.98-1.2 rpart_4.1-15 deldir_0.2-10 pbapply_1.4-3 cowplot_1.1.1 zoo_1.8-9 SummarizedExperiment_1.18.1 haven_2.3.1 ggrepel_0.9.1 cluster_2.1.0 fs_1.5.0 here_1.0.1 magrittr_2.0.1 scattermore_0.7 openxlsx_4.2.4
## [85] lmtest_0.9-38 reprex_0.3.0 RANN_2.6.1 SnowballC_0.7.0 fitdistrplus_1.1-5 matrixStats_0.59.0 hms_0.5.3 patchwork_1.1.1 mime_0.11 evaluate_0.14 xtable_1.8-4 XML_3.99-0.3 rio_0.5.16 sparsesvd_0.2 readxl_1.3.1 gridExtra_2.3 compiler_4.0.3 KernSmooth_2.23-17 crayon_1.4.1 htmltools_0.5.1.1 mgcv_1.8-33 later_1.2.0 lubridate_1.7.9 DBI_1.1.0 tweenr_1.0.1 dbplyr_1.4.4 MASS_7.3-53 Matrix_1.3-4 car_3.0-10 cli_3.0.0 igraph_1.2.6 pkgconfig_2.0.3 GenomicAlignments_1.24.0 foreign_0.8-80 plotly_4.9.4.1 spatstat.sparse_2.0-0 xml2_1.3.2 XVector_0.28.0 rvest_0.3.6 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.18
## [127] spatstat.data_2.1-0 Biostrings_2.56.0 rmarkdown_2.5 cellranger_1.1.0 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 curl_4.3.2 shiny_1.6.0 Rsamtools_2.4.0 lifecycle_1.0.0 nlme_3.1-150 jsonlite_1.7.2 carData_3.0-4 viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.1 lattice_0.20-41 fastmap_1.1.0 httr_1.4.2 survival_3.2-7 glue_1.4.2 qlcMatrix_0.9.7 zip_2.1.1 png_0.1-7 ggforce_0.3.2 stringi_1.6.2 blob_1.2.1 irlba_2.3.3 future.apply_1.7.0